package util;

import java.util.StringTokenizer;

public class Normalizer {

  /**
   * Normalize the content applying three different stemmers (Portuguese, Spanish and English).
   * @param pContent String
   * @throws Exception
   * @return String
   * @see info.iskmm.analysis.stemmer.PortugueseStemmer
   * @see info.iskmm.analysis.stemmer.SpanishStemmer
   * @see info.iskmm.analysis.stemmer.EnglishStemmer
   */
  public static String getNormalizedContent(String pContent) throws Exception {
    StringTokenizer st = 
    	new StringTokenizer(pContent, " "+Util.PATTERN_SIGNS);
    StringBuffer result = new StringBuffer();
    String token = "";
    while (st.hasMoreTokens()) {
      token = st.nextToken();
      token = Util.removeAccent(token).toUpperCase();
      if (!Util.isStopWord(token.toLowerCase())) {
        result.append(Util.removeAccent(
        		Stemmer.stem(token.toLowerCase())) + " ");
      }
    }
    return result.toString().trim();
  }

}
